b54f58780d0393f3c02cdc8d23be33d5ec29b097,h2o-core/src/main/java/water/Model.java,Model,score0,#Chunk#number#number[]#number[]#,425
Before Change
assert chks.length>=_names.length; // Last chunk is for the response
for( int i=0; i<_names.length-1; i++ ) // Do not include last value since it can contains a response
tmp[i] = chks[i].at0(row_in_chunk);
float[] scored = score0(tmp,preds);
// Correct probabilities obtained from training on oversampled data back to original distribution
// C.f. http://gking.harvard.edu/files/0s.pdf Eq.(27)
if (isClassifier() && _priorClassDist != null && _modelClassDist != null) {
assert(scored.length == nclasses()+1); //1 label + nclasses probs
double probsum=0;
for( int c=1; c<scored.length; c++ ) {
final double original_fraction = _priorClassDist[c-1];
assert(original_fraction > 0);
final double oversampled_fraction = _modelClassDist[c-1];
assert(oversampled_fraction > 0);
assert(!Double.isNaN(scored[c]));
scored[c] *= original_fraction / oversampled_fraction;
probsum += scored[c];
}
for (int i=1;i<scored.length;++i) scored[i] /= probsum;
//set label based on corrected probabilities (max value wins, with deterministic tie-breaking)
scored[0] = ModelUtils.getPrediction(scored, tmp);
}
return scored;
}
/** Subclasses implement the scoring logic. The data is pre-loaded into a
After Change
assert chks.length>=_names.length; // Last chunk is for the response
for( int i=0; i<_names.length-1; i++ ) // Do not include last value since it can contains a response
tmp[i] = chks[i].at0(row_in_chunk);
return score0(tmp,preds);
}
/** Subclasses implement the scoring logic. The data is pre-loaded into a